## Categorical Color Map from Fabio
import random
cm_data = [[0.0051932, 0.098238, 0.34984],
[0.98135, 0.80041, 0.98127],
[0.51125, 0.5109, 0.1933],
[0.1333, 0.37528, 0.3794],
[0.94661, 0.61422, 0.41977],
[0.066899, 0.26319, 0.37759],
[0.9929, 0.70485, 0.70411],
[0.30238, 0.45028, 0.30012],
[0.75427, 0.56503, 0.21176],
[0.40297, 0.48047, 0.24473],
[0.98909, 0.75097, 0.83798],
[0.63151, 0.54075, 0.17007],
[0.98757, 0.65842, 0.56623],
[0.20908, 0.41741, 0.34968],
[0.049378, 0.19108, 0.36581],
[0.088353, 0.32217, 0.38473],
[0.85875, 0.58444, 0.29557],
[0.97342, 0.63518, 0.49255],
[0.16795, 0.39789, 0.36778],
[0.57002, 0.52619, 0.17527],
[0.35198, 0.46544, 0.27249],
[0.10684, 0.34977, 0.38455],
[0.99137, 0.72761, 0.77027],
[0.69372, 0.5538, 0.18261],
[0.032053, 0.14677, 0.35824],
[0.075833, 0.29332, 0.38192],
[0.25445, 0.43453, 0.32643],
[0.90724, 0.59732, 0.35314],
[0.059164, 0.22984, 0.37225],
[0.98598, 0.77527, 0.90845],
[0.99259, 0.68191, 0.63687],
[0.45577, 0.49558, 0.21777],
[0.81169, 0.57519, 0.25257],
[0.66269, 0.5475, 0.17404],
[0.98767, 0.763, 0.87286],
[0.18789, 0.408, 0.35948],
[0.99226, 0.71621, 0.73715],
[0.019936, 0.12298, 0.35412],
[0.42909, 0.48801, 0.2311],
[0.071115, 0.2785, 0.37989],
[0.23136, 0.4262, 0.33857],
[0.081553, 0.30786, 0.3836],
[0.78342, 0.57016, 0.23096],
[0.88388, 0.59045, 0.32319],
[0.27817, 0.44252, 0.31355],
[0.054721, 0.21123, 0.36918],
[0.042104, 0.16956, 0.36215],
[0.063071, 0.24709, 0.37505],
[0.11899, 0.36285, 0.38271],
[0.096618, 0.33616, 0.38513],
[0.98391, 0.78776, 0.94463],
[0.98192, 0.64666, 0.5296],
[0.99093, 0.67023, 0.60203],
[0.14971, 0.38698, 0.37445],
[0.37729, 0.47295, 0.25859],
[0.54023, 0.51858, 0.1831],
[0.99031, 0.73918, 0.80381],
[0.99311, 0.69345, 0.67081],
[0.9617, 0.62428, 0.4557],
[0.48312, 0.50322, 0.20504],
[0.60052, 0.5336, 0.17065],
[0.92832, 0.60521, 0.3854],
[0.32701, 0.4579, 0.28638],
[0.72432, 0.55963, 0.19541],
[0.83227, 0.57903, 0.27021],
[0.97805, 0.64087, 0.51109],
[0.99307, 0.69916, 0.68752],
[0.95457, 0.61914, 0.43758],
[0.36459, 0.46921, 0.26554],
[0.22011, 0.42186, 0.34426],
[0.045905, 0.18046, 0.36401],
[0.9895, 0.66433, 0.58425],
[0.061052, 0.23862, 0.37369],
[0.012963, 0.11078, 0.35199],
[0.15862, 0.39253, 0.37132],
[0.98841, 0.75695, 0.85533],
[0.79767, 0.57268, 0.24149],
[0.31465, 0.45411, 0.29328],
[0.99297, 0.68771, 0.65393],
[0.76894, 0.56763, 0.22105],
[0.6471, 0.54418, 0.17146],
[0.39007, 0.47671, 0.25166],
[0.985, 0.7815, 0.92647],
[0.98507, 0.65252, 0.548],
[0.026291, 0.13504, 0.35621],
[0.078517, 0.30062, 0.38281],
[0.46937, 0.49939, 0.21132],
[0.5852, 0.52993, 0.17249],
[0.14126, 0.38124, 0.37713],
[0.41597, 0.48423, 0.23789],
[0.29021, 0.44642, 0.30689],
[0.07344, 0.28594, 0.38096],
[0.98687, 0.7691, 0.89057],
[0.24279, 0.43042, 0.33263],
[0.99194, 0.67609, 0.61958],
[0.052164, 0.20132, 0.36752],
[0.44237, 0.4918, 0.22435],
[0.064936, 0.25526, 0.37636],
[0.19831, 0.4128, 0.35477],
[0.98972, 0.74504, 0.8208]]
cm_hex = []
cm_data_samples = random.sample( cm_data, 50 )
for rgb in cm_data_samples:
cm_hex.append( '#%02x%02x%02x' % tuple( [int( cc*2*256 ) for cc in rgb] ) )
len( cm_hex )
50
This is the notebook for us to have a common workspace to work on the GDELT-IR project for IR course.
##TODO: add auto data source download.
## The data is described in the report.
import numpy as np
## UMAP for clustering
try:
# import umap
import umap.umap_ as umap # Workaround for: AttributeError: module 'umap' has no attribute 'UMAP'
except:
print( "UMAP not installed, installing...")
!pip install umap-learn
import umap.umap_ as umap # Workaround for: AttributeError: module 'umap' has no attribute 'UMAP'
## PyEChart for interactive charts
try:
import pyecharts
except:
print( "pyecharts not installed, installing...")
!pip install pyecharts
import pyecharts.options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.charts import Scatter
import pyecharts.options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.charts import Scatter
from IPython.display import HTML
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
##WARNING: Large file, could take a long time to load.
df_earthquake = pd.read_parquet( "/content/drive/MyDrive/gdelt-ir/gdelt-data/gdelt-data/gsg_2020-11-16_2020-11-19_20_perc.parquet.br" ) # 0.04 COVID Nov
#df_earthquake = pd.read_parquet( "/content/drive/MyDrive/gdelt-ir/gdelt-data/gsg_2022-02-23_2022-02-26_20_perc.parquet.br" ) # UKRAINIAN WAR
#df_earthquake = pd.read_parquet( "/content/drive/MyDrive/gdelt-ir/gdelt-data/gsg_2024-04-13_2024-04-16_20_perc.parquet.br" ) # Iran - Israel
#df_earthquake = pd.read_parquet( "/content/drive/MyDrive/gdelt-ir/gdelt-data/gsg_2024-04-30_2024-05-03_20_perc.parquet.br" ) # Palestian protest
df_earthquake
| date | docembed | lang | model | title | url | |
|---|---|---|---|---|---|---|
| 0 | 2020-11-18 16:47:24 | [-0.04867898, -0.022968698, -0.054670326, -0.0... | ENGLISH | USEv4 | More than 3 million people in U.S. estimated t... | http://www.msn.com/en-us/news/us/more-than-3-m... |
| 1 | 2020-11-18 16:47:24 | [-0.056610804, 0.029553615, -0.016117932, -0.0... | ENGLISH | USEv4 | Europe has half of world's 4M new virus cases ... | https://qctimes.com/news/national/europe-has-h... |
| 2 | 2020-11-18 16:47:26 | [-0.03341733, -0.05604092, -0.04855904, -0.017... | ENGLISH | USEv4 | Bolivia May Boost Energy Exports Under New Gov... | https://oilprice.com/Energy/Energy-General/Bol... |
| 3 | 2020-11-18 16:47:24 | [0.030666357, -0.05313263, -0.052459653, -0.04... | ENGLISH | USEv4 | Biden's DIY transition proceeds without Trump ... | https://www.ctvnews.ca/world/america-votes/bid... |
| 4 | 2020-11-18 16:47:24 | [-0.008952097, 0.026652798, -0.048576683, 0.06... | ENGLISH | USEv4 | Corvallis, Benton County limit access | https://democratherald.com/news/local/corvalli... |
| ... | ... | ... | ... | ... | ... | ... |
| 237143 | 2020-11-16 07:03:27 | [-0.022987098, -0.052931678, -0.05310599, -0.0... | ENGLISH | USEv4 | Success! SpaceX Just Launched 4 Astronauts Int... | https://www.sciencealert.com/success-spacex-ju... |
| 237144 | 2020-11-16 07:03:23 | [-0.05586063, -0.0502217, 0.018631091, 0.02043... | ENGLISH | USEv4 | Coast to Coast AM with George Noory - NewsTalk... | https://kwhn.iheart.com/featured/coast-to-coas... |
| 237145 | 2020-11-16 07:03:25 | [-0.010536348, -0.058810823, -0.040461577, 0.0... | ENGLISH | USEv4 | Amnesty International: Opposition Politicians ... | https://www.voanews.com/africa/amnesty-interna... |
| 237146 | 2020-11-16 07:03:37 | [-0.05657771, -0.045112222, 0.065960735, 0.013... | ENGLISH | USEv4 | Joe Meno | TriQuarterly | https://www.triquarterly.org/contributors/joe-... |
| 237147 | 2020-11-16 07:03:44 | [-0.04426194, 0.015356476, -0.05218221, -0.047... | ENGLISH | USEv4 | Tropical Storm Iota strengthens as it approach... | https://ca.reuters.com/article/us-storm-iota-i... |
237148 rows × 6 columns
df_earthquake_mini = df_earthquake.query( "lang == 'ENGLISH'" ).sample( frac = 0.04 ) # COVID Nov
# df_earthquake_mini = df_earthquake.query( "lang == 'ENGLISH'" ).sample( frac = 0.06 ) # UKRAINIAN WAR
#df_earthquake_mini = df_earthquake.query( "lang == 'ENGLISH'" ).sample( frac = 0.15 ) # Iran - Israel
#df_earthquake_mini = df_earthquake.query( "lang == 'ENGLISH'" ).sample( frac = 0.15 ) # Palestian protest
#df_earthquake_mini = df_earthquake.sample( frac = 0.03 )
df_earthquake_mini
| date | docembed | lang | model | title | url | |
|---|---|---|---|---|---|---|
| 218345 | 2020-11-17 04:48:07 | [-0.045669183, -0.05347452, -0.050438065, -0.0... | ENGLISH | USEv4 | Pfizer, Moderna vaccines: How they compare | https://www.northernstar.com.au/news/pfizer-mo... |
| 4764 | 2020-11-18 12:20:16 | [-0.055367656, 0.0391054, 0.05972394, 0.027645... | ENGLISH | USEv4 | CBeebies reveals details of socially-distanced... | https://www.prolificnorth.co.uk/news/broadcast... |
| 197339 | 2020-11-16 18:47:31 | [-0.032248236, -0.06947365, -0.037009537, -0.0... | ENGLISH | USEv4 | What is life insurance and how does it work? | https://moneyfacts.co.uk/guides/money-guides/w... |
| 84960 | 2020-11-18 21:48:33 | [-0.044243604, -0.053840075, 0.037829816, -0.0... | ENGLISH | USEv4 | Why there's a mismatch between funding for Nig... | https://theconversation.com/why-theres-a-misma... |
| 69019 | 2020-11-16 11:47:49 | [-0.007065032, -0.05157483, -0.055889618, -0.0... | ENGLISH | USEv4 | Federal judge rules acting DHS head Chad Wolf ... | https://www.nbcnews.com/politics/immigration/f... |
| ... | ... | ... | ... | ... | ... | ... |
| 196056 | 2020-11-17 01:32:04 | [-0.051334087, -0.049046136, -0.05843137, -0.0... | ENGLISH | USEv4 | Supreme Court denies request from geriatric pr... | https://www.wjbc.com/news/supreme-court-denies... |
| 87176 | 2020-11-18 22:18:04 | [-0.014992134, 0.054903787, -0.047618452, 0.03... | ENGLISH | USEv4 | The quickest ways to deice and demist your car... | https://www.glasgowsouthandeastwoodextra.co.uk... |
| 168185 | 2020-11-16 21:33:21 | [-0.053530592, -0.0634055, -0.0010489243, 0.03... | ENGLISH | USEv4 | Naspers forecasts dip in earnings due to reduc... | https://www.news24.com/fin24/companies/naspers... |
| 81150 | 2020-11-16 12:32:56 | [-0.0027540403, -0.052914828, 0.052877605, 0.0... | ENGLISH | USEv4 | INFORMATION MANAGEMENT (IM)OFFICER | https://reliefweb.int/job/3687997/information-... |
| 158366 | 2020-11-18 06:46:58 | [0.0385711, -0.006112346, 0.05754666, -0.00636... | ENGLISH | USEv4 | 20 Daily Struggles Women With Big Butts Unders... | https://www.lolwot.com/20-daily-struggles-wome... |
5882 rows × 6 columns
emb_earthquake_mini = df_earthquake_mini.docembed.tolist()
len( emb_earthquake_mini )
5882
umap_reducer = umap.UMAP(
n_neighbors = 50,
negative_sample_rate = 15,
repulsion_strength = 0.05,
metric = 'cosine',
n_components = 2,
# n_epochs = 200,
min_dist = 0.0,
spread = 3,
transform_queue_size = 5,
# negative_sample_rate = 5,
# random_state=123456789
verbose = True
)
emb_2d_earthquake_mini = umap_reducer.fit_transform( emb_earthquake_mini )
emb_2d_earthquake_mini.shape
UMAP(angular_rp_forest=True, metric='cosine', min_dist=0.0, n_neighbors=50, negative_sample_rate=15, repulsion_strength=0.05, spread=3, transform_queue_size=5, verbose=True) Wed Jun 12 19:24:12 2024 Construct fuzzy simplicial set Wed Jun 12 19:24:12 2024 Finding Nearest Neighbors Wed Jun 12 19:24:12 2024 Building RP forest with 9 trees Wed Jun 12 19:24:12 2024 NN descent for 13 iterations 1 / 13 2 / 13 3 / 13 4 / 13 Stopping threshold met -- exiting after 4 iterations Wed Jun 12 19:24:24 2024 Finished Nearest Neighbor Search Wed Jun 12 19:24:27 2024 Construct embedding
completed 0 / 500 epochs completed 50 / 500 epochs completed 100 / 500 epochs completed 150 / 500 epochs completed 200 / 500 epochs completed 250 / 500 epochs completed 300 / 500 epochs completed 350 / 500 epochs completed 400 / 500 epochs completed 450 / 500 epochs Wed Jun 12 19:24:44 2024 Finished embedding
(5882, 2)
# !pip install pacmap
# import pacmap
# # initializing the pacmap instance
# # Setting n_neighbors to "None" leads to a default choice shown below in "parameter" section
# pacmap_reducer = pacmap.PaCMAP(
# n_components = 2,
# n_neighbors = 10,
# MN_ratio = 0.5,
# FP_ratio = 2.0,
# distance="angular",
# # verbose = True
# )
# # fit the data (The index of transformed data corresponds to the index of the original data)
# emb_2d_earthquake_mini = pacmap_reducer.fit_transform( emb_earthquake_mini, init="pca" )
# emb_2d_earthquake_mini.shape
from sklearn.mixture import BayesianGaussianMixture
if 1:
bgm = BayesianGaussianMixture(
n_components = 150,
# covariance_type = "diag",
# weight_concentration_prior_type = "dirichlet_distribution",
weight_concentration_prior = 100000,
mean_precision_prior = 0.001,
degrees_of_freedom_prior = 50,
# reg_covar = 0,
random_state = 123456780,
max_iter = 1000,
init_params = "k-means++",
verbose = 2, verbose_interval = 50
)
cluster_labels = bgm.fit_predict(emb_2d_earthquake_mini)
# ## Apply QuantileTransform before clustering
# qt = QuantileTransformer( n_quantiles = 200, random_state=0 )
# embeddings_norm = qt.fit_transform( cluster_earthquake_mini )
# cluster_labels = bgm.fit_predict(embeddings_norm)
Initialization 0 Iteration 50 time lapse 8.10095s ll change 9.51029 Iteration 100 time lapse 9.49196s ll change 6.63580 Iteration 150 time lapse 10.36477s ll change 2.89493 Iteration 200 time lapse 11.70570s ll change 0.01248 Iteration 250 time lapse 11.93063s ll change 0.03236 Iteration 300 time lapse 11.04866s ll change 0.02644 Iteration 350 time lapse 10.83622s ll change 0.13198 Iteration 400 time lapse 11.47385s ll change 0.09802 Initialization converged: True time lapse 90.98954s ll -23742.16686
cluster_freq = {}
for cls in set( cluster_labels ):
cluster_mask = (cluster_labels == cls)
df_filtered = df_earthquake_mini[ cluster_mask ]
max_freq_per_hour = df_filtered.set_index( "date" ).resample( "12H" ).url.count().max()
cluster_freq[ cls ] = max_freq_per_hour
cluster_freq
point_freq = np.asarray( [cluster_freq[ cls ] for cls in cluster_labels] )
point_freq
array([44, 24, 40, ..., 47, 56, 24])
point_bgm_probs = bgm.predict_proba( emb_2d_earthquake_mini )
point_bgm_probs = point_bgm_probs[ np.arange(len(point_bgm_probs)), cluster_labels ]
point_bgm_probs
array([0.9972915 , 0.94767394, 0.69545896, ..., 0.92668248, 0.98444224,
0.49436713])
cluster_bgm_areas = {}
for cls in set( cluster_labels ):
cluster_filter = (cluster_labels == cls)
cov = np.cov( emb_2d_earthquake_mini[ cluster_filter, 0 ], emb_2d_earthquake_mini[ cluster_filter, 1 ] )
area = np.sqrt( np.linalg.det( cov ) )
cluster_bgm_areas[ cls ] = area if not np.isnan( area ) else 0
## Normalizes area for each cluster to [0,1]
area_min = min( [area for area in cluster_bgm_areas.values()] )
area_max = max( [area for area in cluster_bgm_areas.values()] )
cluster_bgm_areas = {kv[ 0 ]: (kv[ 1 ] - area_min)/(area_max - area_min + np.finfo( np.float32 ).eps) for kv in cluster_bgm_areas.items()}
cluster_bgm_areas
point_bgm_areas = np.asarray( [cluster_bgm_areas[ cls ] for cls in cluster_labels] )
point_bgm_areas.shape
(5882,)
point_bgm_scores = bgm.score_samples( emb_2d_earthquake_mini )
point_bgm_scores = np.exp( point_bgm_scores )
point_bgm_scores
array([0.00444008, 0.00091659, 0.00181243, ..., 0.00469691, 0.00414089,
0.00352442])
import numpy as np
from sklearn.preprocessing import QuantileTransformer
from pyecharts.charts import Timeline
def visualize(
df,
emb,
visual_scale = None,
cluster_labels = None,
compacting_label = True,
dot_size = 10,
quantile_transform = False,
opacity = 1,
color_map = None
):
""" Generate and display a HTML page containing the visualization of the reduced embeddings in the DataFrame, with mouse hover tooltips.
Args:
df:
The dataframe to visualize, contain columns:
date: timestamp
title: the title of the news articles.
url: the URL of the news articles.
emb:
A 2D array of the low dimensional embedding of the news articles.
visual_scale:
a Numpy array (of or will be converted to) float to be visualized by VisualMap.
dot_size:
Dot size in the chart.
quantile_transform:
Normalize the outliers or not.
Note the order of all data rows must match.
"""
## Normalize the bounds of the dots to reduce the impact of outliers.
qt = QuantileTransformer( n_quantiles = 200, random_state=0 )
if quantile_transform:
print( "Quantile transform...")
embeddings_norm = qt.fit_transform( emb )
else:
embeddings_norm = emb
df[ "emb" ] = embeddings_norm.tolist()
df = df.set_index( "date" ) # .sort_index()
VISUALMAP_OPS = None
if visual_scale is not None:
print( "Processing visual scale...")
visual_scale = visual_scale.astype( float )
visual_data = [
{
"timestamp": date,
"title": title,
"data": [data[ 0 ],data[ 1 ], date.to_pydatetime().timestamp(), cls],
"url": url
}
for date, url, title, data, cls in zip(
list( df.index ),
list( df.url ),
list( df.title ),
df.emb,
visual_scale
)
]
DIMENSIION_CLUSTER_INDEX = 3
print( "Visual scale range:", visual_scale.min(), visual_scale.max() )
VISUALMAP_OPS = opts.VisualMapOpts(
type_ = 'color',
pos_top = 'middle',
min_ = visual_scale.min(),
max_ = visual_scale.max(),
precision = 4,
pos_left = 10,
dimension = DIMENSIION_CLUSTER_INDEX,
range_color = color_map,
range_opacity = opacity,
item_width = 30,
item_height = 400,
)
elif cluster_labels is not None:
cluster_labels_compact = None
print( "Processing cluster labels...")
## Convert cluster labels to int
if( not isinstance( cluster_labels[ 0 ], int ) ):
cluster_labels = [int( label ) for label in cluster_labels]
cluster_labels = list( cluster_labels )
if compacting_label:
## Compactize cluster labels to 1,2,3,...
label_mapping = {}
label_id = 0
for label in cluster_labels:
if label not in label_mapping:
label_mapping[ label ] = label_id
label_id += 1
print( "Compact label mappings:", label_mapping )
cluster_labels_compact = [label_mapping[ label ] for label in cluster_labels]
else:
cluster_labels_compact = cluster_labels
visual_data = [
{
"timestamp": date,
"title": title,
"data": [data[ 0 ],data[ 1 ], date.to_pydatetime().timestamp(), cls],
"url": url
}
for date, url, title, data, cls in zip(
list( df.index ),
list( df.url ),
list( df.title ),
df.emb,
cluster_labels_compact
)
]
DIMENSIION_CLUSTER_INDEX = 3
CLUSTER_COUNT = max( cluster_labels_compact )
print( "CLUSTER_COUNT:", CLUSTER_COUNT )
VISUALMAP_OPS = opts.VisualMapOpts(
type_ = 'color',
pos_top = 'middle',
min_ = 0,
max_ = CLUSTER_COUNT,
pos_left = 10,
# split_number = CLUSTER_COUNT,
dimension = DIMENSIION_CLUSTER_INDEX,
# is_piecewise=True,
range_color = color_map,
range_opacity = opacity,
item_width = 30,
item_height = 400,
)
else:
visual_data = [
{
"timestamp": date,
"title": title,
"data": [data[ 0 ],data[ 1 ], date.to_pydatetime().timestamp()],
"url": url
}
for date, url, title, data in zip(
list( df.index ),
list( df.url ),
list( df.title ),
df.emb
)
]
x_data = [{"value":item[ "data" ]} for item in visual_data]
y_data = [{"timestamp": item[ "timestamp" ], "url": item[ "url" ], "title": item[ "title" ], "value": item[ "data" ]} for item in visual_data]
print( "Rendering...")
current_scatter = (
Scatter(
init_opts=opts.InitOpts(
width="1200px", height="800px",
animation_opts=opts.AnimationOpts(
animation=False,
animation_threshold = 1
)
)
)
.add_xaxis(
xaxis_data = x_data
)
.add_yaxis(
series_name = "News Embeddings Clustering",
y_axis = y_data,
symbol_size = dot_size,
label_opts=opts.LabelOpts(is_show=False),
emphasis_opts = opts.EmphasisOpts(
focus = "none" if visual_scale is not None else "self",
itemstyle_opts = opts.ItemStyleOpts(
border_type = "dashed",
# opacity = 1.0,
border_width = 100,
),
)
)
.set_global_opts(
xaxis_opts=opts.AxisOpts(
type_="value",
splitline_opts = opts.SplitLineOpts(is_show=True)
),
yaxis_opts=opts.AxisOpts(
type_="value",
axistick_opts = opts.AxisTickOpts(is_show=True),
splitline_opts = opts.SplitLineOpts(is_show=True),
),
tooltip_opts=opts.TooltipOpts(
position = ['15%', '0%'],
formatter=JsCode(
"""function (params) {
return
params.data.timestamp + '<br>' +
params.data.url + '<br>' +
params.data.title;
}"""
)
),
visualmap_opts = VISUALMAP_OPS,
datazoom_opts = [
opts.DataZoomOpts( range_start = 0, range_end = 100, filter_mode = "none", orient = 'horizontal' ),
opts.DataZoomOpts( range_start = 0, range_end = 100, filter_mode = "none", orient = 'vertical' ),
opts.DataZoomOpts( range_start = 0, range_end = 100, filter_mode = "none", type_ = "inside", orient = 'horizontal' ),
opts.DataZoomOpts( range_start = 0, range_end = 100, filter_mode = "none", type_ = "inside", orient = 'vertical' )
],
)
)
current_scatter.render()
display( HTML(filename='render.html') )
visualize( df_earthquake_mini, emb_2d_earthquake_mini, cluster_labels = cluster_labels, compacting_label = False, dot_size = 2, quantile_transform=False, color_map = cm_hex )
Processing cluster labels... CLUSTER_COUNT: 147 Rendering...
visualize(
df_earthquake_mini,
emb_2d_earthquake_mini,
visual_scale = point_freq,
compacting_label = False,
dot_size = 2,
# opacity = 0.9,
color_map = None,
quantile_transform=False
)
Processing visual scale... Visual scale range: 2.0 66.0 Rendering...
# Gaussian Mixture Density
visualize(
df_earthquake_mini,
emb_2d_earthquake_mini,
visual_scale = point_bgm_scores,
compacting_label = False,
dot_size = 2,
# opacity = 0.9,
color_map = None,
quantile_transform=False
)
Processing visual scale... Visual scale range: 3.5933124676558116e-05 0.006599493377467835 Rendering...
cluster_domain_map = {}
for cls in set(cluster_labels):
cluster_mask = (cluster_labels == cls)
df_filtered = df_earthquake_mini.loc[cluster_mask].copy() # Use .copy() to explicitly create a copy
df_filtered[ "domain" ] = df_filtered.url.str.extract("//([^/]+)" )
unique_domains = df_filtered["domain"].unique()
cluster_domain_map[cls] = len(unique_domains)
map_to_dots_domain = np.asarray( [cluster_domain_map[ cls ] for cls in cluster_labels] )
cls = 1
#for cls in set( cluster_labels ):
cluster_mask = (cluster_labels == cls)
#print(emb_2d_earthquake_mini[cluster_mask])
df_filtered = df_earthquake_mini.loc[cluster_mask].copy() # Use .copy() to explicitly create a copy
df_filtered[ "domain" ] = df_filtered.url.str.extract("//([^/]+)" )
out = df_filtered.domain.unique()
out
array(['www.abc.net.au', 'www.torontotelegraph.com',
'www.businesswireindia.com', 'dailypost.vu', '925rocks.iheart.com',
'www.nbcmiami.com', 'www.vancourier.com', 'www.zawya.com',
'economictimes.indiatimes.com', 'www.finanznachrichten.de',
'www.coinspeaker.com', 'www.stuff.co.nz', 'www.msn.com',
'www.benzinga.com', 'techcrunch.com', 'en.mercopress.com',
'www.indiatvnews.com', 'oklahoman.com', 'techcentral.co.za',
'www.kmjnow.com', 'www.ksat.com', 'www.digitaljournal.com',
'www.bbc.com', 'www.afr.com', 'www.morningstar.com',
'www.nbcconnecticut.com', 'www.sfchronicle.com',
'www.lakeexpo.com', 'www.fool.com', 'www.businessinsider.com',
'www.thestandard.com.hk', 'businessrecord.com', 'sbr.com.sg',
'allafrica.com', 'www.waateanews.com', 'www.propertyxpress.com',
'www.marketscreener.com', 'www.heraldscotland.com',
'www.khaleejtimes.com', 'www.680news.com', 'www.wfmz.com',
'www.swfinstitute.org', 'www.investordaily.com.au',
'www.casinonewsdaily.com', 'www.standardmedia.co.ke', 'skift.com',
'www.jamaicaobserver.com', 'www.prnewswire.com:443',
'961srs.iheart.com', 'www.princegeorgecitizen.com',
'www.pressreleasepoint.com', 'www.colombopage.com',
'www.coindesk.com', 'www.burnabynow.com', 'www.siliconindia.com'],
dtype=object)
# ## Mean density per cluster
densities = []
cluster_density_map = {}
for cls in set(cluster_labels):
cluster_mask = (cluster_labels == cls)
#cluster_emb = np.array(emb_2d_earthquake_mini)[cluster_mask] # Extract embedding data for the current cluster
#if len(cluster_emb) >= 10: # Adjust the threshold as needed
# Compute the density of the cluster using score_samples
density = np.mean(point_bgm_scores[cluster_mask])
# Append the density to the list
densities.append(density)
#else:
# densities.append(0) # Set density to 0 if the number of data points is insufficient
# Map cluster label to density
cluster_density_map[cls] = density
map_to_dots = np.asarray( [cluster_density_map[ cls ] for cls in cluster_labels] )
cluster_density_domain_map = {key: (cluster_density_map[key], cluster_domain_map[key], np.power(cluster_density_map[key], 0.5) * cluster_domain_map[key]) for key in cluster_domain_map.keys()}
cluster_density_domain_map
map_final_score_to_dots = np.asarray( [cluster_density_domain_map[ cls ][ 2 ] for cls in cluster_labels] )
sorted_cluster_density = sorted(cluster_density_domain_map.items(), key=lambda x: x[1][2], reverse=True)
sorted_cluster_density = [(item[0], item[1][0], item[1][1], item[1][2]) for item in sorted_cluster_density]
sorted_cluster_density
#test on turky
#where is the earthquaque cluster ranked, how can we move it up to top?
[(12, 0.003905591681397265, 254, 15.873662240233852), (16, 0.0036356377958458915, 213, 12.843101306138337), (110, 0.0032831070199952593, 178, 10.1991157862596), (90, 0.004479526865880605, 139, 9.303168200977513), (17, 0.0027056412445230138, 169, 8.790666617772613), (62, 0.0037842857754543284, 139, 8.550800282286628), (42, 0.002302072291585435, 166, 7.9646659733430285), (76, 0.0022110739231865567, 157, 7.3824630803428635), (121, 0.002946445840515112, 133, 7.219396129377569), (145, 0.002700588241003613, 136, 7.06753706078736), (35, 0.002762798803640092, 134, 7.043352562392535), (28, 0.002361505814791595, 133, 6.463178502706585), (81, 0.002224873754590205, 137, 6.462093739640702), (49, 0.0021592721607710127, 138, 6.412579748410398), (77, 0.0018133638146330612, 132, 5.621036479704295), (20, 0.001685969808028291, 131, 5.378933711766069), (13, 0.0023391792567711133, 109, 5.271791796884395), (21, 0.0017668712983489043, 111, 4.665792672950316), (15, 0.0022759507361885295, 94, 4.484450992592276), (96, 0.002121909102066681, 96, 4.422161720770345), (56, 0.0018297270901107307, 95, 4.063654388386068), (50, 0.0013327573415579132, 104, 3.7967227191737862), (45, 0.0015704107732625206, 88, 3.487299962455906), (79, 0.0018897769381036916, 78, 3.390782047171841), (82, 0.0011965332222481483, 80, 2.767275306576516), (22, 0.0014152112882460815, 71, 2.6709698807827276), (146, 0.001593495563471577, 64, 2.554791151538532), (111, 0.00101942128470995, 80, 2.554270193645081), (88, 0.00102710253908765, 78, 2.4997783597369714), (37, 0.001020064519630394, 78, 2.4911990160224686), (123, 0.0012137000105467053, 70, 2.4386738305232325), (46, 0.0010899033489962329, 66, 2.178903161737022), (24, 0.001180413921839534, 61, 2.0957862971125913), (68, 0.001500849714105757, 54, 2.092003290229819), (10, 0.0012948751864884623, 57, 2.051109329338886), (55, 0.0009927084675114507, 65, 2.047972967408476), (1, 0.0013093176988448513, 55, 1.9901472405341458), (120, 0.0014395235418003427, 49, 1.8591116222170803), (36, 0.0014191192743387806, 48, 1.8082175776373124), (147, 0.0010621788504994291, 52, 1.6947364431528746), (30, 0.000980528039391178, 54, 1.6909227548485695), (106, 0.0008550704228978699, 51, 1.491320948004607), (14, 0.0016280141561603064, 30, 1.2104597228095926), (54, 0.0007411263723534417, 39, 1.0617218149541738), (78, 0.0006495905103412968, 40, 1.019482621993173), (41, 0.0012276734592535402, 29, 1.0161069723371783), (29, 0.0006969230134927307, 31, 0.8183782841489101), (127, 0.0006662723651905451, 22, 0.5678695490623034), (32, 0.0006509273516756337, 19, 0.48475228102083623), (11, 0.002536233839814704, 8, 0.40288827948718126), (7, 0.0004501614784778442, 15, 0.31825513767654234), (73, 0.00031526952382958146, 17, 0.30184912189163154), (134, 0.0005928398362242693, 12, 0.2921796303924946), (5, 0.0005306746312470178, 7, 0.16125463382831476), (91, 0.00015759591058832386, 9, 0.11298348887184459), (115, 0.00015519734888567638, 9, 0.11212040518897436), (141, 0.00019114652958890322, 8, 0.11060460159364892), (25, 0.0002965602138567455, 6, 0.10332554233510141), (64, 0.0001469155061693345, 7, 0.08484609479697572), (2, 0.00013904269052574715, 7, 0.0825414552558992), (67, 0.00037341354308348867, 3, 0.05797173352377345), (31, 0.0003188856611410175, 2, 0.03571473987815213), (131, 6.90155733711952e-05, 3, 0.024922683650457002)]
# Final score visualization per cluster
visualize(
df_earthquake_mini,
emb_2d_earthquake_mini,
visual_scale = map_to_dots_domain,
compacting_label = False,
dot_size = 2,
# opacity = 0.9,
color_map = None,
quantile_transform=False
)
Processing visual scale... Visual scale range: 2.0 254.0 Rendering...
# Final score visualization per cluster
visualize(
df_earthquake_mini,
emb_2d_earthquake_mini,
visual_scale = point_bgm_probs*map_final_score_to_dots,
compacting_label = False,
dot_size = 2,
# opacity = 0.9,
color_map = None,
quantile_transform=False
)
Processing visual scale... Visual scale range: 0.013800648007207228 15.216635398483401 Rendering...
[row[0] for row in sorted_cluster_density[:10]]
[12, 16, 110, 90, 17, 62, 42, 76, 121, 145]
(point_bgm_probs*map_final_score_to_dots).shape
(5882,)
point_bgm_probs.shape
(5882,)
#Ranks the points within clusters by probability
def rank_points_by_prob(cluster_labels, point_bgm_probs):
ranked_points = {}
for cls in set(cluster_labels):
points_in_cluster = np.where(cluster_labels == cls)[0]
probs = point_bgm_probs[points_in_cluster]
ranked_points[cls] = points_in_cluster[np.argsort(-probs)]
return ranked_points
ranked_points = rank_points_by_prob(cluster_labels, point_bgm_probs)
top_clusters = [row[0] for row in sorted_cluster_density[:10]]
#Retrieves top 10 articles
top_articles = {}
for cls in top_clusters:
top_articles[cls] = df_earthquake_mini.iloc[ranked_points[cls][:10]]
for ii, (cls, articles) in enumerate( top_articles.items() ):
print(f"Top 10 articles for nr {ii + 1} cluster-{cls}:")
for jj, (index, row) in enumerate( articles.iterrows() ):
print(f"\t{jj + 1}-th Title: {row['title']}")
print(f"\t{jj + 1}-th URL: {row['url']}")
print( "\t----" )
print()
Top 10 articles for nr 1 cluster-12: 1-th Title: Subsidised healthcare plan for Hong Kong citizens in Mainland China 1-th URL: https://www.imtj.com/news/subsidised-healthcare-plan-hong-kong-citizens-mainland-china/ ---- 2-th Title: Distancing scheme in greater Seoul raised to Level 1.5 amid virus resurgence 2-th URL: http://www.koreaherald.com/view.php?ud=20201117000098 ---- 3-th Title: Lagos warns religious leaders against second COVID-19 wave 3-th URL: https://punchng.com/lagos-warns-religious-leaders-against-second-covid-19-wave/ ---- 4-th Title: Iran registers record daily rise in coronavirus cases, deaths 4-th URL: https://www.livemint.com/news/world/iran-registers-record-daily-rise-in-coronavirus-cases-deaths-11605532726078.html ---- 5-th Title: Michigan governor says she has authority for stay-home order 5-th URL: https://www.thestar.com/news/world/us/2020/11/16/michigan-governor-says-she-has-authority-for-stay-home-order.html ---- 6-th Title: Inter-island travel visa hits final stage 6-th URL: http://www.tribune242.com/news/2020/nov/17/inter-island-travel-visa-hits-final-stage/ ---- 7-th Title: NewsNow: Scotland news | Breaking News & Search 24/7 7-th URL: https://www.newsnow.co.uk/h/UK/Scotland ---- 8-th Title: thebahamasweekly.com - New COVID-19 Initiatives to curb rise in Family Island Cases 8-th URL: http://www.thebahamasweekly.com/publish/bis-news-updates/New_COVID-19_Initiatives_to_curb_rise_in_Family_Island_Cases67252.shtml ---- 9-th Title: Asia Today: New Zealand imposes new mask rules as precaution 9-th URL: https://www.washingtontimes.com/news/2020/nov/15/asia-today-new-zealand-imposes-new-mask-rules-as-p/ ---- 10-th Title: Whitmer claims authority for stay-home order | News, Sports, Jobs 10-th URL: https://www.ironmountaindailynews.com/news/local-news/2020/11/whitmer-claims-authority-for-stay-home-order/ ---- Top 10 articles for nr 2 cluster-16: 1-th Title: Ex-felon arrested after standoff in Fernley; no one hurt 1-th URL: https://www.chron.com/news/article/Ex-felon-arrested-after-standoff-in-Fernley-no-15734362.php ---- 2-th Title: Brown County Sheriff's office searches for escaped inmate 2-th URL: https://www.wlwt.com/article/brown-county-sheriffs-office-searches-for-escaped-inmate/34712425 ---- 3-th Title: North Platte man arrested after scuffle with a trooper at the Elm Creek truck stop 3-th URL: https://kearneyhub.com/news/state-and-regional/north-platte-man-arrested-after-scuffle-with-a-trooper-at-the-elm-creek-truck-stop/article_8b77b960-283c-11eb-afca-6b78ac60112a.html ---- 4-th Title: Break-in at Exeter Industrial Park 4-th URL: https://www.mycariboonow.com/67285/break-in-at-exeter-industrial-park/ ---- 5-th Title: Husband of 'Tiger King' star Joe Exotic arrested in Travis County on DWI charge 5-th URL: https://oklahoman.com/article/5676367/husband-of-tiger-king-star-joe-exotic-arrested-in-travis-county-on-dwi-charge ---- 6-th Title: Cadillac Man Arrested After Fleeing From Police in September 6-th URL: https://www.9and10news.com/2020/11/16/cadillac-man-arrested-after-fleeing-from-police-in-september/ ---- 7-th Title: Two sought in Georgetown-area thefts 7-th URL: https://www.capegazette.com/article/two-sought-georgetown-area-thefts/211683 ---- 8-th Title: Man Who Pointed Shotgun At National Grid Employees Charged | News, Sports, Jobs 8-th URL: https://www.post-journal.com/news/latest-news/2020/11/man-who-pointed-shotgun-at-national-grid-employees-charged/ ---- 9-th Title: Gloversville stop yields $63,000 in drugs, cash 9-th URL: https://www.timesunion.com/news/article/Gloversville-stop-yields-63-000-in-drugs-cash-15734225.php ---- 10-th Title: Suspect in Fairfield shooting case appears in court 10-th URL: https://www.dailyrepublic.com/all-dr-news/solano-news/fairfield/suspect-in-fairfield-shooting-case-appears-in-court/ ---- Top 10 articles for nr 3 cluster-110: 1-th Title: SOS Forum • Quick request for help - driving me nuts! 1-th URL: https://www.soundonsound.com/forum/viewtopic.php?f=16&t=75042 ---- 2-th Title: Dhanush, Sai Pallavi's Rowdy Baby garners 1 billion views on YouTube | Entertainment News,The Indian Express 2-th URL: https://indianexpress.com/article/entertainment/tamil/dhanush-sai-pallavis-rowdy-baby-garners-1-billion-views-on-youtube-7053563/ ---- 3-th Title: Darshan Raval Songs Download: Darshan Raval New MP3 Songs Free Online on Gaana.com 3-th URL: https://gaana.com/artist/darshan-raval ---- 4-th Title: Glamorous pictures of Nepali beauty Aditi Budhathoki make heads turn 4-th URL: https://photogallery.indiatimes.com/celebs/celeb-themes/glamorous-pictures-of-nepali-beauty-aditi-budhathoki-make-heads-turn/articleshow/71277482.cms ---- 5-th Title: There Are Reportedly Two Different Contract Types In WWE 5-th URL: https://411mania.com/wrestling/there-are-reportedly-two-different-contract-types-in-wwe/ ---- 6-th Title: SOS Forum • Quick request for help - driving me nuts! 6-th URL: https://www.soundonsound.com/forum/viewtopic.php?f=16&t=75042&view=unread ---- 7-th Title: Exclusive! Amaal Mallik: 'Tu Mera Nahi' is meant to send a message about acceptance and moving on | Hindi Movie News 7-th URL: https://timesofindia.indiatimes.com/entertainment/hindi/music/news/exclusive-amaal-mallik-tu-mera-nahi-is-meant-to-send-a-message-about-acceptance-and-moving-on/articleshow/79278851.cms ---- 8-th Title: Report - Two Different Types Of Contracts In WWE - eWrestlingNews.com 8-th URL: https://www.ewrestlingnews.com/news/report-two-different-types-of-contracts-in-wwe ---- 9-th Title: Kangana Ranaut celebrates nephew Prithviraj's birthday in style with family as he turns 3; see pictures | Hindi Movie News 9-th URL: https://timesofindia.indiatimes.com/entertainment/hindi/bollywood/news/kangana-ranaut-celebrates-nephew-prithvirajs-birthday-in-style-with-family-as-he-turns-3-see-pictures/articleshow/79243084.cms ---- 10-th Title: When Funny meme called Ranveer as 'motichoor laddoo' and Deepika as 'gajar ka halwa' 10-th URL: https://www.cinetalkers.com/when-funny-meme-called-ranveer-as-motichoor-laddoo-and-deepika-as-gajar-ka-halwa/ ---- Top 10 articles for nr 4 cluster-90: 1-th Title: COVID-19 vaccine in Alabama will be free 1-th URL: https://www.al.com/news/2020/11/covid-19-vaccine-in-alabama-will-be-free-first-doses-could-be-available-by-december.html ---- 2-th Title: COVID-19 WRAP | SA's Covid-19 cases jump by 2,888 in a day & cost of Covax estimated at R4.8bn 2-th URL: https://www.timeslive.co.za/news/south-africa/2020-11-18-covid-19-live-updates-china-discovers-covid-19-on-imported-food-reports-suggest-it-may-have-come-from-other-countries/ ---- 3-th Title: Pfizer launches Immunization Pilot Program for its COVID-19 Vaccine 3-th URL: https://www.industryleadersmagazine.com/pfizer-launches-immunization-pilot-program-for-its-covid-19-vaccine/ ---- 4-th Title: PFE Stock Down 2% Now, Pfizer Launches COVID-19 Vaccine Delivery Trial in US 4-th URL: https://www.coinspeaker.com/pfizer-covid-19-vaccine-delivery-trial/ ---- 5-th Title: Who Will Be the First to Get COVID-19 Vaccines? 5-th URL: https://www.kmaj1440.com/news/who-will-be-the-first-to-get-covid-19-vaccines/ ---- 6-th Title: City of LA COVID Test Sites Prepare to Give COVID Vaccines in Early 2021 6-th URL: https://www.msn.com/en-us/health/medical/city-of-la-covid-test-sites-prepare-to-give-covid-vaccines-in-early-2021/ar-BB1b7fZm ---- 7-th Title: What's Next for the Pfizer and Moderna COVID-19 Vaccines 7-th URL: https://www.wftw.com/news/whats-next-for-the-pfizer-and-moderna-covid-19-vaccines/ ---- 8-th Title: NHS 'working incredibly hard' to ensure it is ready to deploy Covid vaccine 8-th URL: https://www.glasgowtimes.co.uk/news/viralnews/18880452.nhs-working-incredibly-hard-ensure-ready-deploy-covid-vaccine/ ---- 9-th Title: Edelstein: No deal to allow virus vaccination for all Israelis 9-th URL: https://www.ynetnews.com/health_science/article/By3pIbZcD ---- 10-th Title: Coronavirus vaccine could be made mandatory after Matt Hancock refuses to rule it out 10-th URL: https://www.express.co.uk/news/uk/1360927/coronavirus-vaccine-mandatory-compulsory-matt-hancock-covid-19-news ---- Top 10 articles for nr 5 cluster-17: 1-th Title: TEEX Selects IBM to Extend Its Cyber Readiness Center Capabilities 1-th URL: https://www.hostreview.com/news/201116-teex-selects-ibm-to-extend-its-cyber-readiness-center-capabilities ---- 2-th Title: U.S. Insurtech TrustLayer Joins MarshBerry's Connect Platform 2-th URL: https://www.crowdfundinsider.com/2020/11/169173-u-s-insurtech-trustlayer-joins-marshberrys-connect-platform/ ---- 3-th Title: Lummus Technology and TCG Digital form joint digital venture 3-th URL: https://www.hydrocarbonprocessing.com/news/2020/11/lummus-technology-and-tcg-digital-form-joint-digital-venture ---- 4-th Title: Apple, ChargePoint team up on electric vehicle charging info 4-th URL: https://www.investing.com/news/stock-market-news/apple-chargepoint-team-up-on-electric-vehicle-charging-info-2349829 ---- 5-th Title: Accenture Makes Strategic Investment in TripleBlind to Bolster Data Privacy and Increase Data Collaboration Opportunities 5-th URL: https://www.businesswire.com/news/home/20201118005370/en/Accenture-Makes-Strategic-Investment-in-TripleBlind-to-Bolster-Data-Privacy-and-Increase-Data-Collaboration-Opportunities ---- 6-th Title: Junior Supply Chain Analyst - Montvale, New Jersey - 5000662446006 6-th URL: http://www.latpro.com/jobs/3761879.html ---- 7-th Title: SASE Specialist Cato Networks, Valued At $1B, Secures Record Funding Round 7-th URL: https://www.crn.com/news/networking/sase-specialist-cato-networks-valued-at-1b-secures-record-funding-round ---- 8-th Title: ICE Mortgage Technology Announces Encompass 20.2 Major Release 8-th URL: https://www.prnewswire.com:443/news-releases/ice-mortgage-technology-announces-encompass-20-2-major-release-301173469.html ---- 9-th Title: INFORMATION MANAGEMENT (IM)OFFICER 9-th URL: https://reliefweb.int/job/3687997/information-management-imofficer ---- 10-th Title: Newswire & Press Release / Daron Group Launches Infor Solution to Help Harmonize Operations in Africa - Software - Infor 10-th URL: https://www.newswiretoday.com/news/174145/Daron-Group-Launches-Infor-Solution-to-Help-Harmonize-Operations-in-Africa/ ---- Top 10 articles for nr 6 cluster-62: 1-th Title: Breaking: Bitcoin breaks above $17,000 first time since January 2018; now the sky is the limit 1-th URL: https://www.fxstreet.com/cryptocurrencies/news/breaking-bitcoin-breaks-above-17-000-first-time-since-january-2018-now-the-sky-is-the-limit-202011171159 ---- 2-th Title: Dow sets intraday record, TSX hits nine-month high on reaction to COVID-19 vaccine 2-th URL: https://www.ctvnews.ca/business/dow-sets-intraday-record-tsx-hits-nine-month-high-on-reaction-to-covid-19-vaccine-1.5190302 ---- 3-th Title: Gold price today: Yellow metal slips in red, likely to get support around Rs 50,500-50,300 3-th URL: https://www.moneycontrol.com/news/business/markets/gold-price-today-yellow-metal-slips-in-red-likely-to-get-support-around-rs-50500-50300-6130321.html ---- 4-th Title: Bitcoin Price Prediction: BTC Could Hit $318,000 By 2021, Citibank Analyst Says 4-th URL: https://www.ibtimes.com/bitcoin-price-prediction-btc-could-hit-318000-2021-citibank-analyst-says-3083618 ---- 5-th Title: Seoul stocks open flat on valuation pressure 5-th URL: http://www.koreaherald.com/view.php?ud=20201117000161 ---- 6-th Title: Gold Manipulation & Gold Salvation – Investment Watch 6-th URL: https://www.investmentwatchblog.com/gold-manipulation-gold-salvation/ ---- 7-th Title: French shares down 0.21 pct Tuesday 7-th URL: https://www.thestar.com.my/news/world/2020/11/18/french-shares-down-021-pct-tuesday ---- 8-th Title: Market exchange rates in China -- Nov. 17 8-th URL: https://www.mexicostar.com/news/267004033/market-exchange-rates-in-china----nov-17 ---- 9-th Title: U.S. stocks rally on vaccine news - China.org.cn 9-th URL: http://www.china.org.cn/world/Off_the_Wire/2020-11/17/content_76916855.htm ---- 10-th Title: Sellers meet their mixed as stock markets in Asia close mixed 10-th URL: https://www.nashvilleherald.com/news/267004677/sellers-meet-their-mixed-as-stock-markets-in-asia-close-mixed ---- Top 10 articles for nr 7 cluster-42: 1-th Title: Saints QB Brees set for MRI, X-ray on injured ribs 1-th URL: https://www.theusnews.com/news/266994577/saints-qb-brees-set-for-mri-x-ray-on-injured-ribs ---- 2-th Title: Philadelphia Eagles at New York Giants 11/15/2020 2-th URL: https://pro32.ap.org/photo-gallery/philadelphia-eagles-new-york-giants-11152020 ---- 3-th Title: Saints Down 49ers to Extend Win Streak 3-th URL: https://classicrock1051.com/saints-down-49ers-to-extend-win-streak/ ---- 4-th Title: Locals in the pros: Taysom Hill's playing time could increase with Brees injury 4-th URL: https://www.idahostatejournal.com/preps/schools/highland/locals-in-the-pros-taysom-hills-playing-time-could-increase-with-brees-injury/article_940c7170-83f6-5e20-914e-84cfa3234386.html ---- 5-th Title: NFL roundup: Cardinals win on last-second Hail Mary 5-th URL: https://www.ohiostandard.com/news/266994810/nfl-roundup-cardinals-win-on-last-second-hail-mary ---- 6-th Title: Saints QB Drew Brees, 41, 'suffers broken ribs and a collapsed lung' 6-th URL: https://www.dailymail.co.uk/news/article-8955259/Report-Brees-broken-ribs-collapsed-lung.html ---- 7-th Title: Chicago Bears quarterback Nick Foles carted off field with injury 7-th URL: https://www.104thehawk.com/news/chicago-bears-quarterback-nick-foles-carted-off-field-with-injury/ ---- 8-th Title: Colin Cowherd Describes Every Week 10 NFL Game in Three Words 8-th URL: https://foxsports1070.iheart.com/content/2020-11-16-colin-cowherd-describes-every-week-10-nfl-game-in-three-words/ ---- 9-th Title: Saints turn to Winston after Brees injures ribs 9-th URL: https://www.953thescore.com/news/saints-turn-to-winston-after-brees-injures-ribs/ ---- 10-th Title: Snap Count Analysis | Who Is the Only Jet to Participate in Every Snap on Offense Through Nine Weeks? 10-th URL: https://www.newyorkjets.com/news/snap-count-analysis-who-is-the-only-jet-to-participate-in-every-snap-on-offense- ---- Top 10 articles for nr 8 cluster-76: 1-th Title: Ask the Weather Guys:How is Wisconsin winter weather affected by La Niña? 1-th URL: https://chippewa.com/news/state-and-regional/ask-the-weather-guys-how-is-wisconsin-winter-weather-affected-by-la-ni-a/article_95be5098-7686-51db-8b87-b64d0f22827a.html ---- 2-th Title: Kenya: Motorists Stranded, Residents Displaced By Floods in Homa Bay 2-th URL: https://allafrica.com/stories/202011160932.html ---- 3-th Title: Kenya: Motorists Stranded, Residents Displaced By Floods in Homa Bay 3-th URL: https://allafrica.com/stories/202011160932.html ---- 4-th Title: Milder on Monday, snow favors northern Minnesota 4-th URL: https://www.mprnews.org/story/2020/11/15/milder-on-monday-snow-favors-northern-minnesota ---- 5-th Title: What happened to the rest of the lights on the Sunshine Skyway? 5-th URL: https://www.tampabay.com/news/florida/2020/11/17/what-happened-to-the-rest-of-the-lights-on-the-sunshine-skyway/ ---- 6-th Title: Cool morning on Tuesday 6-th URL: https://www.wesh.com/article/cool-morning-on-tuesday/34693408 ---- 7-th Title: Weather Forecast: Scattered rain showers stick around Monday 7-th URL: https://www.kpax.com/weather/weather-forecast-scattered-rain-showers-stick-around-monday ---- 8-th Title: What a scorcher! Sydney sizzles through 40C as thousands dodge work to flock to the beach 8-th URL: https://www.dailymail.co.uk/news/article-8952697/What-scorcher-Sydney-sizzles-40C-thousands-dodge-work-flock-beach.html ---- 9-th Title: WA Seattle WA Zone Forecast 9-th URL: https://www.sfgate.com/weather/article/WA-Seattle-WA-Zone-Forecast-15732654.php ---- 10-th Title: Forecasters Say Rough Mountain Travel, Flooding Possible in Coming CA Storm 10-th URL: https://kfbk.iheart.com/content/2020-11-17-forecasters-say-rough-mountain-travel-flooding-possible-in-coming-ca-storm/ ---- Top 10 articles for nr 9 cluster-121: 1-th Title: Potential presidential recount expected to cost La Crosse County $63,250 1-th URL: https://lacrossetribune.com/community/potential-presidential-recount-expected-to-cost-la-crosse-county-63-250/article_3d5f9d2d-2cb6-500b-a64a-f36ee757d9f6.html ---- 2-th Title: Trump Pays $3 Million for Partial Recount in Wisconsin 2-th URL: https://www.1490wosh.com/news/trump-pays-3-million-for-partial-recount-in-wisconsin/ ---- 3-th Title: Trump campaign to seek partial recount in Wisconsin 3-th URL: https://www.newstalk1030.com/news/trump-campaign-to-seek-partial-recount-in-wisconsin/ ---- 4-th Title: Trump faces approaching deadline for recount in Wisconsin 4-th URL: https://www.wdtimes.com/news/national/article_c5247beb-641d-59ae-adfa-3dd530c4f2c3.html ---- 5-th Title: EXPLAINER: Why AP called North Carolina for Trump 5-th URL: https://apnews.com/article/why-ap-has-not-called-north-carolina-e07a1022a90ef31ca7c75d32eea1849b ---- 6-th Title: Georgia Audit Complete, No Fraud Found, Secretary 'Not Surprised' 6-th URL: https://dayton.binnews.com/content/2020-11-17-georgia-audit-complete-no-fraud-found-secretary-not-surprised/ ---- 7-th Title: Trump faces approaching deadline for recount in Wisconsin 7-th URL: https://www.greenwichtime.com/news/article/Trump-faces-approaching-deadline-for-recount-in-15733147.php ---- 8-th Title: Georgia Recount Finds 2,600 Uncounted Ballots in Floyd County 8-th URL: https://www.breitbart.com/2020-election/2020/11/16/georgia-recount-finds-2600-uncounted-ballots-in-floyd-county/ ---- 9-th Title: Bribes kickbacks? | Citizen WElls 9-th URL: https://citizenwells.com/tag/bribes-kickbacks/ ---- 10-th Title: Second Georgia county finds previously uncounted votes 10-th URL: https://www.westport-news.com/news/article/Georgia-official-No-sign-of-election-machine-15734080.php ---- Top 10 articles for nr 10 cluster-145: 1-th Title: Stephen Thompson 1-th URL: https://www.kuaf.com/people/stephen-thompson ---- 2-th Title: Wolfgang Van Halen Reveals Solo Single, 'Distance,' For His Late-Father 2-th URL: https://1051rocks.iheart.com/content/2020-11-16-wolfgang-van-halen-reveals-solo-single-distance-for-his-late-father/ ---- 3-th Title: Wolfgang Van Halen wrote a song in memory of dad Eddie Van Halen 3-th URL: https://www.newstalk1030.com/news/wolfgang-van-halen-wrote-a-song-in-memory-of-dad-eddie-van-halen/ ---- 4-th Title: Wolfgang Van Halen wrote a song in memory of dad Eddie Van Halen 4-th URL: https://www.wqut.com/news/wolfgang-van-halen-wrote-a-song-in-memory-of-dad-eddie-van-halen/ ---- 5-th Title: Wolfgang Van Halen wrote a song in memory of dad Eddie Van Halen 5-th URL: https://www.khit1075.com/news/wolfgang-van-halen-wrote-a-song-in-memory-of-dad-eddie-van-halen/ ---- 6-th Title: Wolfgang Van Halen Was 'Ready' To Retire From Van Halen And Clear Way For Michael Anthony To Return 6-th URL: https://www.blabbermouth.net/news/wolfgang-van-halen-was-ready-to-retire-from-van-halen-and-clear-way-for-michael-anthony-to-return/ ---- 7-th Title: Wolfgang Van Halen Reveals Solo Single, 'Distance,' For His Late-Father 7-th URL: https://thebusfm.iheart.com/content/2020-11-16-wolfgang-van-halen-reveals-solo-single-distance-for-his-late-father/ ---- 8-th Title: Eddie Van Halen Hugged Members Of AC/DC The First Time They Met 8-th URL: https://theriver973.iheart.com/content/2020-11-17-eddie-van-halen-hugged-members-of-acdc-the-first-time-they-met/ ---- 9-th Title: Chuck Berry Radio: Listen to Free Music & Get The Latest Info 9-th URL: https://www.iheart.com/artist/chuck-berry-644/ ---- 10-th Title: Review: Ampline - Modern Fix 10-th URL: http://www.modernfix.com/reviews/cdampline/ ----